import pandas as pd
import numpy as np
import os
import plotly
import plotly.express as px
import scipy as sc
from scipy import stats
import plotly.graph_objects as go
#Change the directory using the os module
#os.chdir()
#Load the data using Pandas
happiness = pd.read_csv("Final Dataset csv.csv")
#Transform the year values into strings
happiness["Year"] = happiness["Year"].astype(str)
Source: https://www.kaggle.com/unsdsn/world-happiness (2015-2019) datasets. Source: https://www.kaggle.com/londeen/world-happiness-report-2020 (2020) dataset. Source: https://www.kaggle.com/ajaypalsinghlo/world-happiness-report-2021 (2021) dataset.
This data is combined data from the 2015-2021 reports.
#Compare the descriptive statistics from the first year and last year of the data
happiness_2015 = happiness[happiness.Year.isin(["2015"])]
happiness_2021 = happiness[happiness.Year.isin(["2021"])]
happiness_2020 = happiness[happiness.Year.isin(["2020"])]
happiness_2015.describe()
| Happiness Rank | Happiness_Score | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|
| count | 158.000000 | 158.000000 | 158.000000 | 158.000000 | 158.000000 | 158.000000 | 158.000000 | 158.000000 | 158.000000 |
| mean | 79.493671 | 5.375734 | 0.846137 | 0.991046 | 0.630259 | 0.428615 | 0.143422 | 0.237296 | 2.098977 |
| std | 45.754363 | 1.145010 | 0.403121 | 0.272369 | 0.247078 | 0.150693 | 0.120034 | 0.126685 | 0.553550 |
| min | 1.000000 | 2.839000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.328580 |
| 25% | 40.250000 | 4.526000 | 0.545808 | 0.856823 | 0.439185 | 0.328330 | 0.061675 | 0.150553 | 1.759410 |
| 50% | 79.500000 | 5.232500 | 0.910245 | 1.029510 | 0.696705 | 0.435515 | 0.107220 | 0.216130 | 2.095415 |
| 75% | 118.750000 | 6.243750 | 1.158448 | 1.214405 | 0.811013 | 0.549092 | 0.180255 | 0.309883 | 2.462415 |
| max | 158.000000 | 7.587000 | 1.690420 | 1.402230 | 1.025250 | 0.669730 | 0.551910 | 0.795880 | 3.602140 |
happiness_2021.describe()
| Happiness Rank | Happiness_Score | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|
| count | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 0.0 |
| mean | 75.000000 | 5.532839 | 0.977161 | 0.793315 | 0.520161 | 0.498711 | 0.135141 | 0.178047 | NaN |
| std | 43.156691 | 1.073924 | 0.404740 | 0.258871 | 0.213019 | 0.137888 | 0.114361 | 0.098270 | NaN |
| min | 1.000000 | 2.523000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | NaN |
| 25% | 38.000000 | 4.852000 | 0.666000 | 0.647000 | 0.357000 | 0.409000 | 0.060000 | 0.105000 | NaN |
| 50% | 75.000000 | 5.534000 | 1.025000 | 0.832000 | 0.571000 | 0.514000 | 0.101000 | 0.164000 | NaN |
| 75% | 112.000000 | 6.255000 | 1.323000 | 0.996000 | 0.665000 | 0.603000 | 0.174000 | 0.239000 | NaN |
| max | 149.000000 | 7.842000 | 1.751000 | 1.172000 | 0.897000 | 0.716000 | 0.547000 | 0.541000 | NaN |
These basic descriptive tables suggest several key findings:
The maximum level of happiness has increased in 2021 compared to 2015.
In contrast, the minimum level of happiness has decreased in 2021 compared to 2015. It can be concluded that the minimum levels have been decreasing from 2018-2021.
However, the average happiness score across all regions is higher in 2021 than in 2015 which suggests that people are more happy.
happiness_2020.describe()
| Happiness Rank | Happiness_Score | Economy (GDP per Capita) | Family | Health (Life Expectancy) | Freedom | Trust (Government Corruption) | Generosity | Dystopia Residual | |
|---|---|---|---|---|---|---|---|---|---|
| count | 153.000000 | 153.00000 | 153.000000 | 153.000000 | 153.000000 | 153.000000 | 153.000000 | 153.000000 | 0.0 |
| mean | 77.000000 | 5.47324 | 0.868771 | 1.155607 | 0.692869 | 0.463583 | 0.130718 | 0.189375 | NaN |
| std | 44.311398 | 1.11227 | 0.372416 | 0.286866 | 0.254094 | 0.141172 | 0.113097 | 0.100401 | NaN |
| min | 1.000000 | 2.56690 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | NaN |
| 25% | 39.000000 | 4.72410 | 0.575862 | 0.986718 | 0.495443 | 0.381457 | 0.055805 | 0.115006 | NaN |
| 50% | 77.000000 | 5.51500 | 0.918549 | 1.203987 | 0.759818 | 0.483293 | 0.098435 | 0.176745 | NaN |
| 75% | 115.000000 | 6.22850 | 1.169229 | 1.387139 | 0.867249 | 0.576665 | 0.163064 | 0.255510 | NaN |
| max | 153.000000 | 7.80870 | 1.536676 | 1.547567 | 1.137814 | 0.693270 | 0.533162 | 0.569814 | NaN |
Next, statistical tests will be conducted on a small selection of regions to determine if there is a difference between the average happiness score for regions within the same continent
# Differences within Europe
westerneurope = happiness[happiness.Region=='Western Europe']
easteurope = happiness[happiness.Region=='Central and Eastern Europe']
europe_test = sc.stats.ttest_ind(westerneurope.Happiness_Score , easteurope.Happiness_Score)
europe_pvalue = europe_test.pvalue
print(europe_pvalue)
5.343912590391673e-52
# Differences within America
northamerica = happiness[happiness.Region=="North America"]
southamerica = happiness[happiness.Region=="Latin America and Caribbean"]
america_test = sc.stats.ttest_ind(northamerica.Happiness_Score, southamerica.Happiness_Score)
america_pvalue = america_test.pvalue
print(america_pvalue)
1.2487995911146715e-08
The statistical tests conducted above suggest at alpha = 0.05 that there are significant differences between the happiness scores of different regions within continents.
px.bar(happiness, x="Happiness_Score",y= "Region", color="Region",animation_frame="Year", animation_group="Region", hover_name="Region", range_x=[0,10], color_discrete_sequence = px.colors.qualitative.Safe, title= "Maximum Happiness Scores", orientation = 'h' )
#Use 2019 and 2021 data
happiness_2019 = happiness[happiness.Year.isin(["2019"])]
happiness_2021 = happiness[happiness.Year.isin(["2021"])]
happiness_2019 = pd.DataFrame(happiness_2019)
happiness_2021 = pd.DataFrame(happiness_2021)
# Seperate regions from each other so that different colours can be applied to them 2019
westerneurope_21 = happiness_2021[happiness_2021.Region=='Western Europe']
easteurope_21 = happiness_2021[happiness_2021.Region=='Central and Eastern Europe']
northafrica_21 = happiness_2021[happiness_2021.Region=="Middle East and North Africa"]
africa_21 = happiness_2021[happiness_2021.Region=='Sub-Saharan Africa']
northamerica_21 = happiness_2021[happiness_2021.Region=='North America']
southamerica_21 = happiness_2021[happiness_2021.Region=='Latin America and Caribbean']
eastasia_21 = happiness_2021[happiness_2021.Region=='Eastern Asia']
southeastasia_21 = happiness_2021[happiness_2021.Region=="Southeastern Asia"]
southasia_21 = happiness_2021[happiness_2021.Region=='Southern Asia']
australianz_21 = happiness_2021[happiness_2021.Region=='Australia and New Zealand']
#Seperate regions from each other so that different colours can be applied to them 2019
westerneurope = happiness_2019[happiness_2019.Region=='Western Europe']
easteurope = happiness_2019[happiness_2019.Region=='Central and Eastern Europe']
northafrica = happiness_2019[happiness_2019.Region=="Middle East and North Africa"]
africa = happiness_2019[happiness_2019.Region=='Sub-Saharan Africa']
northamerica = happiness_2019[happiness_2019.Region=='North America']
southamerica = happiness_2019[happiness_2019.Region=='Latin America and Caribbean']
eastasia = happiness_2019[happiness_2019.Region=='Eastern Asia']
southeastasia = happiness_2019[happiness_2019.Region=="Southeastern Asia"]
southasia = happiness_2019[happiness_2019.Region=='Southern Asia']
australianz = happiness_2019[happiness_2019.Region=='Australia and New Zealand']
When this visualisation is produced, all scatter plots appear on the screen. To view the overall scatter plot showing the data for both years, click the overall button. To be able to see the seperate scatter plots for each year, select the year.
fig = go.Figure()
#Add overall scatter plots to compare years
fig.add_trace(go.Scatter(x=happiness_2019["Health (Life Expectancy)"], y=happiness_2019["Happiness_Score"], mode="markers", marker_color = "rgb(136, 203, 238)", name="Health 2019", hovertext=happiness_2019['Country'], hoverlabel=dict(namelength=0) ))
fig.add_trace(go.Scatter(x=happiness_2021["Health (Life Expectancy)"], y=happiness_2021["Happiness_Score"], mode="markers", marker_color = "rgb(0,0,0)", name="Health 2021", hovertext=happiness_2021['Country'], hoverlabel=dict(namelength=0)))
#Add scatter plots for health 2019
fig.add_trace(go.Scatter(x=westerneurope["Health (Life Expectancy)"], y=westerneurope["Happiness_Score"], mode="markers", marker_color="rgb(204, 102, 119)",name="Western Europe", hovertext=westerneurope['Country'], hoverlabel=dict(namelength=0)))
fig.add_trace(go.Scatter(x=easteurope["Health (Life Expectancy)"], y=easteurope["Happiness_Score"], mode="markers", marker_color="rgb(221,204,119)",name="Central and Eastern Europe", hovertext=easteurope['Country'] ))
fig.add_trace(go.Scatter(x=northafrica["Health (Life Expectancy)"], y=northafrica["Happiness_Score"], mode="markers", marker_color="rgb(17,119,51)",name="Middle East and North Africa", hovertext=northafrica['Country'] ))
fig.add_trace(go.Scatter(x=africa["Health (Life Expectancy)"], y=africa["Happiness_Score"], mode="markers", marker_color="rgb(51,34,136)",name="Sub-Saharan Africa", hovertext=africa['Country'] ))
fig.add_trace(go.Scatter(x=northamerica["Health (Life Expectancy)"], y=northamerica["Happiness_Score"], mode="markers", marker_color="rgb(170,68,153)",name="North America", hovertext=northamerica['Country'] ))
fig.add_trace(go.Scatter(x=southamerica["Health (Life Expectancy)"], y=southamerica["Happiness_Score"], mode="markers", marker_color="rgb(68,170,153)",name="Latin America and Caribbean", hovertext=southamerica['Country'] ))
fig.add_trace(go.Scatter(x=eastasia["Health (Life Expectancy)"], y=eastasia["Happiness_Score"], mode="markers", marker_color="rgb(153,153,51)",name="Eastern Asia", hovertext=eastasia['Country'] ))
fig.add_trace(go.Scatter(x=southeastasia["Health (Life Expectancy)"], y=southeastasia["Happiness_Score"], mode="markers", marker_color="rgb(136,34,85)",name="Southeastern Asia", hovertext=southeastasia['Country'] ))
fig.add_trace(go.Scatter(x=southasia["Health (Life Expectancy)"], y=southasia["Happiness_Score"], mode="markers", marker_color="rgb(102,17,0)",name="South Asia", hovertext=southasia['Country'] ))
fig.add_trace(go.Scatter(x=australianz["Health (Life Expectancy)"], y=australianz["Happiness_Score"], mode="markers", marker_color="rgb(136,136,136)", name="Australia and New Zealand", hovertext=australianz['Country']))
#Add scatter plots for health (2021)
fig.add_trace(go.Scatter(x=westerneurope_21["Health (Life Expectancy)"], y=westerneurope_21["Happiness_Score"], mode="markers", marker_color="rgb(204,102,119)",name="Western Europe", hovertext=westerneurope_21['Country'] ))
fig.add_trace(go.Scatter(x=easteurope_21["Health (Life Expectancy)"], y=easteurope_21["Happiness_Score"], mode="markers", marker_color="rgb(221,204,119)",name="Central and Eastern Europe", hovertext=easteurope_21['Country'] ))
fig.add_trace(go.Scatter(x=northafrica_21["Health (Life Expectancy)"], y=northafrica_21["Happiness_Score"], mode="markers", marker_color="rgb(17,119,51)",name="Middle East and North Africa", hovertext=northafrica_21['Country'] ))
fig.add_trace(go.Scatter(x=africa_21["Health (Life Expectancy)"], y=africa_21["Happiness_Score"], mode="markers", marker_color="rgb(51,36,136)",name="Sub-Saharan Africa", hovertext=africa_21['Country'] ))
fig.add_trace(go.Scatter(x=northamerica_21["Health (Life Expectancy)"], y=northamerica_21["Happiness_Score"], mode="markers", marker_color="rgb(170,68,153)",name="North America", hovertext=northamerica_21['Country'] ))
fig.add_trace(go.Scatter(x=southamerica_21["Health (Life Expectancy)"], y=southamerica_21["Happiness_Score"], mode="markers", marker_color="rgb(68,170,153)",name="Latin America and Caribbean", hovertext=southamerica_21['Country'] ))
fig.add_trace(go.Scatter(x=eastasia_21["Health (Life Expectancy)"], y=eastasia_21["Happiness_Score"], mode="markers", marker_color="rgb(153,153,51)",name="Eastern Asia", hovertext=eastasia_21['Country'] ))
fig.add_trace(go.Scatter(x=southeastasia_21["Health (Life Expectancy)"], y=southeastasia_21["Happiness_Score"], mode="markers", marker_color="rgb(136,34,85)",name="Southeastern Asia", hovertext=southeastasia_21['Country'] ))
fig.add_trace(go.Scatter(x=southasia_21["Health (Life Expectancy)"], y=southasia_21["Happiness_Score"], mode="markers", marker_color="rgb(102,17,0)",name="South Asia", hovertext=southasia_21['Country'] ))
fig.add_trace(go.Scatter(x=australianz_21["Health (Life Expectancy)"], y=australianz_21["Happiness_Score"], mode="markers", marker_color="rgb(136,136,136)", name="Australia and New Zealand", hovertext=australianz_21['Country']))
#Add buttons to be able to change which data is viewed.
fig.layout.update(updatemenus= [go.layout.Updatemenu(type = "buttons", direction="right", buttons=list([dict(label="Overall", method = "update",
args=[{"visible":[True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False]}, {"title": "Overall Health"}]), dict(label="Health 2019", method="update", args=[{"visible":[False, False, True, True, True, True, True, True, True, True, True,True, False, False, False, False, False, False, False, False, False, False ]}, {"title":"Health 2019"}]), dict(label="Health 2021", method = "update", args=[{"visible": [False, False, False, False,False,False,False,False,False,False,False,False, True, True, True, True, True, True, True, True,True, True]}, {"title":"Health 2021"}])]))])
fig.update_yaxes(range=[0, 10], constrain="domain")
fig.update_xaxes(range=[0, 1.2], constrain="domain")
fig.update_layout(title= "The relationship between health and happiness before and during the COVID-19 pandemic", xaxis_title = "Health (contribution)", yaxis_title="Happiness Score")
fig.update_layout(template="plotly_white")
fig.show()
The Health contribution represents the significance of health to happiness.